## Ehrlich 2007 IO

library(foreign) 
library(Amelia)

## Original dataset
e2007 <- read.dta("E2007 IO Rep Data.dta")
head(e2007)
dim(e2007)

## Drop ID vars
e2007$ctryn <- NULL

## How many variables? 23: no reduction necessary
dim(e2007)

## Stage 3: Imputation
## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(e2007)
mean(NAs(e2007)/nrow(e2007))*100

## Thus: 5 imputations

## Already lags for tariff, left, nop, pres, dist3, bicam2, pr, pool

set.seed(02136)
e2007.out <- amelia(e2007, m = 5, ts = "year", cs = "ctry", polytime = 3, lags = c("tariff", "left", "nop", "pres", "dist3", "bicam2", "pr", "pool"), empri = 0.01*nrow(e2007))

write.amelia(obj=e2007.out, file.stem = "E2007 IO Imp Data", format = "dta", separate = FALSE)